package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Set;

import org.apache.lucene.store.Directory;

/**
 * <p>
 * This class implements a {@link MergePolicy} that tries to merge segments into
 * levels of exponentially increasing size, where each level has < mergeFactor
 * segments in it. Whenever a given levle has mergeFactor segments or more in
 * it, they will be merged.
 * </p>
 * 
 * <p>
 * This class is abstract and requires a subclass to define the {@link #size}
 * method which specifies how a segment's size is determined.
 * {@link LogDocMergePolicy} is one subclass that measures size by document
 * count in the segment. {@link LogByteSizeMergePolicy} is another subclass that
 * measures size as the total byte size of the file(s) for the segment.
 * </p>
 */

public abstract class LogMergePolicy extends MergePolicy {

	/**
	 * Defines the allowed range of log(size) for each level. A level is
	 * computed by taking the max segment log size, minuse LEVEL_LOG_SPAN, and
	 * finding all segments falling within that range.
	 */
	public static final double LEVEL_LOG_SPAN = 0.75;

	/**
	 * Default merge factor, which is how many segments are merged at a time
	 */
	public static final int DEFAULT_MERGE_FACTOR = 10;

	/**
	 * Default maximum segment size. A segment of this size or larger will never
	 * be merged.
	 * 
	 * @see setMaxMergeDocs
	 */
	public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE;

	private int mergeFactor = DEFAULT_MERGE_FACTOR;

	long minMergeSize;
	long maxMergeSize;
	int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS;

	private boolean useCompoundFile = true;
	private boolean useCompoundDocStore = true;
	private IndexWriter writer;

	private void message(String message) {
		if (writer != null)
			writer.message("LMP: " + message);
	}

	/**
	 * <p>
	 * Returns the number of segments that are merged at once and also controls
	 * the total number of segments allowed to accumulate in the index.
	 * </p>
	 */
	public int getMergeFactor() {
		return mergeFactor;
	}

	/**
	 * Determines how often segment indices are merged by addDocument(). With
	 * smaller values, less RAM is used while indexing, and searches on
	 * unoptimized indices are faster, but indexing speed is slower. With larger
	 * values, more RAM is used during indexing, and while searches on
	 * unoptimized indices are slower, indexing is faster. Thus larger values (>
	 * 10) are best for batch index creation, and smaller values (< 10) for
	 * indices that are interactively maintained.
	 */
	public void setMergeFactor(int mergeFactor) {
		if (mergeFactor < 2)
			throw new IllegalArgumentException(
					"mergeFactor cannot be less than 2");
		this.mergeFactor = mergeFactor;
	}

	// Javadoc inherited
	public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) {
		return useCompoundFile;
	}

	/**
	 * Sets whether compound file format should be used for newly flushed and
	 * newly merged segments.
	 */
	public void setUseCompoundFile(boolean useCompoundFile) {
		this.useCompoundFile = useCompoundFile;
	}

	/**
	 * Returns true if newly flushed and newly merge segments are written in
	 * compound file format.
	 * 
	 * @see #setUseCompoundFile
	 */
	public boolean getUseCompoundFile() {
		return useCompoundFile;
	}

	// Javadoc inherited
	public boolean useCompoundDocStore(SegmentInfos infos) {
		return useCompoundDocStore;
	}

	/**
	 * Sets whether compound file format should be used for newly flushed and
	 * newly merged doc store segment files (term vectors and stored fields).
	 */
	public void setUseCompoundDocStore(boolean useCompoundDocStore) {
		this.useCompoundDocStore = useCompoundDocStore;
	}

	/**
	 * Returns true if newly flushed and newly merge doc store segment files
	 * (term vectors and stored fields) are written in compound file format.
	 * 
	 * @see #setUseCompoundDocStore
	 */
	public boolean getUseCompoundDocStore() {
		return useCompoundDocStore;
	}

	public void close() {
	}

	abstract protected long size(SegmentInfo info) throws IOException;

	private boolean isOptimized(SegmentInfos infos, IndexWriter writer,
			int maxNumSegments, Set segmentsToOptimize) throws IOException {
		final int numSegments = infos.size();
		int numToOptimize = 0;
		SegmentInfo optimizeInfo = null;
		for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) {
			final SegmentInfo info = infos.info(i);
			if (segmentsToOptimize.contains(info)) {
				numToOptimize++;
				optimizeInfo = info;
			}
		}

		return numToOptimize <= maxNumSegments
				&& (numToOptimize != 1 || isOptimized(writer, optimizeInfo));
	}

	/**
	 * Returns true if this single nfo is optimized (has no pending norms or
	 * deletes, is in the same dir as the writer, and matches the current
	 * compound file setting
	 */
	private boolean isOptimized(IndexWriter writer, SegmentInfo info)
			throws IOException {
		return !info.hasDeletions() && !info.hasSeparateNorms()
				&& info.dir == writer.getDirectory()
				&& info.getUseCompoundFile() == useCompoundFile;
	}

	/**
	 * Returns the merges necessary to optimize the index. This merge policy
	 * defines "optimized" to mean only one segment in the index, where that
	 * segment has no deletions pending nor separate norms, and it is in
	 * compound file format if the current useCompoundFile setting is true. This
	 * method returns multiple merges (mergeFactor at a time) so the
	 * {@link MergeScheduler} in use may make use of concurrency.
	 */
	public MergeSpecification findMergesForOptimize(SegmentInfos infos,
			IndexWriter writer, int maxNumSegments, Set segmentsToOptimize)
			throws IOException {
		MergeSpecification spec;

		assert maxNumSegments > 0;

		if (!isOptimized(infos, writer, maxNumSegments, segmentsToOptimize)) {

			// Find the newest (rightmost) segment that needs to
			// be optimized (other segments may have been flushed
			// since optimize started):
			int last = infos.size();
			while (last > 0) {
				final SegmentInfo info = infos.info(--last);
				if (segmentsToOptimize.contains(info)) {
					last++;
					break;
				}
			}

			if (last > 0) {

				spec = new MergeSpecification();

				// First, enroll all "full" merges (size
				// mergeFactor) to potentially be run concurrently:
				while (last - maxNumSegments + 1 >= mergeFactor) {
					spec.add(new OneMerge(
							infos.range(last - mergeFactor, last),
							useCompoundFile));
					last -= mergeFactor;
				}

				// Only if there are no full merges pending do we
				// add a final partial (< mergeFactor segments) merge:
				if (0 == spec.merges.size()) {
					if (maxNumSegments == 1) {

						// Since we must optimize down to 1 segment, the
						// choice is simple:
						if (last > 1 || !isOptimized(writer, infos.info(0)))
							spec.add(new OneMerge(infos.range(0, last),
									useCompoundFile));
					} else if (last > maxNumSegments) {

						// Take care to pick a partial merge that is
						// least cost, but does not make the index too
						// lopsided. If we always just picked the
						// partial tail then we could produce a highly
						// lopsided index over time:

						// We must merge this many segments to leave
						// maxNumSegments in the index (from when
						// optimize was first kicked off):
						final int finalMergeSize = last - maxNumSegments + 1;

						// Consider all possible starting points:
						long bestSize = 0;
						int bestStart = 0;

						for (int i = 0; i < last - finalMergeSize + 1; i++) {
							long sumSize = 0;
							for (int j = 0; j < finalMergeSize; j++)
								sumSize += size(infos.info(j + i));
							if (i == 0
									|| (sumSize < 2 * size(infos.info(i - 1)) && sumSize < bestSize)) {
								bestStart = i;
								bestSize = sumSize;
							}
						}

						spec.add(new OneMerge(infos.range(bestStart, bestStart
								+ finalMergeSize), useCompoundFile));
					}
				}

			} else
				spec = null;
		} else
			spec = null;

		return spec;
	}

	/**
	 * Checks if any merges are now necessary and returns a
	 * {@link MergePolicy.MergeSpecification} if so. A merge is necessary when
	 * there are more than {@link #setMergeFactor} segments at a given level.
	 * When multiple levels have too many segments, this method will return
	 * multiple merges, allowing the {@link MergeScheduler} to use concurrency.
	 */
	public MergeSpecification findMerges(SegmentInfos infos, IndexWriter writer)
			throws IOException {

		final int numSegments = infos.size();
		this.writer = writer;
		message("findMerges: " + numSegments + " segments");

		// Compute levels, which is just log (base mergeFactor)
		// of the size of each segment
		float[] levels = new float[numSegments];
		final float norm = (float) Math.log(mergeFactor);

		final Directory directory = writer.getDirectory();

		for (int i = 0; i < numSegments; i++) {
			final SegmentInfo info = infos.info(i);
			long size = size(info);

			// Floor tiny segments
			if (size < 1)
				size = 1;
			levels[i] = (float) Math.log(size) / norm;
		}

		final float levelFloor;
		if (minMergeSize <= 0)
			levelFloor = (float) 0.0;
		else
			levelFloor = (float) (Math.log(minMergeSize) / norm);

		// Now, we quantize the log values into levels. The
		// first level is any segment whose log size is within
		// LEVEL_LOG_SPAN of the max size, or, who has such as
		// segment "to the right". Then, we find the max of all
		// other segments and use that to define the next level
		// segment, etc.

		MergeSpecification spec = null;

		int start = 0;
		while (start < numSegments) {

			// Find max level of all segments not already
			// quantized.
			float maxLevel = levels[start];
			for (int i = 1 + start; i < numSegments; i++) {
				final float level = levels[i];
				if (level > maxLevel)
					maxLevel = level;
			}

			// Now search backwards for the rightmost segment that
			// falls into this level:
			float levelBottom;
			if (maxLevel < levelFloor)
				// All remaining segments fall into the min level
				levelBottom = -1.0F;
			else {
				levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN);

				// Force a boundary at the level floor
				if (levelBottom < levelFloor && maxLevel >= levelFloor)
					levelBottom = levelFloor;
			}

			int upto = numSegments - 1;
			while (upto >= start) {
				if (levels[upto] >= levelBottom) {
					break;
				}
				upto--;
			}
			message("  level " + levelBottom + " to " + maxLevel + ": "
					+ (1 + upto - start) + " segments");

			// Finally, record all merges that are viable at this level:
			int end = start + mergeFactor;
			while (end <= 1 + upto) {
				boolean anyTooLarge = false;
				for (int i = start; i < end; i++) {
					final SegmentInfo info = infos.info(i);
					anyTooLarge |= (size(info) >= maxMergeSize || info.docCount >= maxMergeDocs);
				}

				if (!anyTooLarge) {
					if (spec == null)
						spec = new MergeSpecification();
					message("    " + start + " to " + end + ": add this merge");
					spec.add(new OneMerge(infos.range(start, end),
							useCompoundFile));
				} else
					message("    "
							+ start
							+ " to "
							+ end
							+ ": contains segment over maxMergeSize or maxMergeDocs; skipping");

				start = end;
				end = start + mergeFactor;
			}

			start = 1 + upto;
		}

		return spec;
	}

	/**
	 * <p>
	 * Determines the largest segment (measured by document count) that may be
	 * merged with other segments. Small values (e.g., less than 10,000) are
	 * best for interactive indexing, as this limits the length of pauses while
	 * indexing to a few seconds. Larger values are best for batched indexing
	 * and speedier searches.
	 * </p>
	 * 
	 * <p>
	 * The default value is {@link Integer#MAX_VALUE}.
	 * </p>
	 * 
	 * <p>
	 * The default merge policy ({@link LogByteSizeMergePolicy}) also allows
	 * you to set this limit by net size (in MB) of the segment, using {@link
	 * LogByteSizeMergePolicy#setMaxMergeMB}.
	 * </p>
	 */
	public void setMaxMergeDocs(int maxMergeDocs) {
		this.maxMergeDocs = maxMergeDocs;
	}

	/**
	 * Returns the largest segment (measured by document count) that may be
	 * merged with other segments.
	 * 
	 * @see #setMaxMergeDocs
	 */
	public int getMaxMergeDocs() {
		return maxMergeDocs;
	}

}
